{
  "nbformat": 4,
  "nbformat_minor": 5,
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.10.0"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Economic Crimes by Category in Latin America (2020-2023)\n",
        "## Python Analysis Notebook\n",
        "\n",
        "**Author:** de la Serna, Juan Moises (International University of La Rioja)\n",
        "**DOI:** https://doi.org/10.7910/DVN/8FXZOJ\n",
        "**Date:** 2026-03-20 | **License:** CC0 1.0\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport warnings\nwarnings.filterwarnings('ignore')\nplt.rcParams['figure.dpi'] = 150\nprint('Libraries loaded.')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "df = pd.read_csv('delitos_economicos_latinoamerica_2020_2023.tab', sep='\\t')\ndf2 = pd.read_csv('delitos_economicos_latinoamerica_2020_2023-2.tab', sep='\\t')\nprint('Shape:', df.shape)\nprint('Countries:', df['country'].nunique())\nprint('Years:', sorted(df['year'].unique()))\ndf.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Figure 1: CPI by country (2023)\ndf_2023 = df[df['year']==2023].dropna(subset=['cpi_score']).sort_values('cpi_score')\nfig, ax = plt.subplots(figsize=(10,8))\ncolors = plt.cm.RdYlGn(np.linspace(0.1, 0.9, len(df_2023)))\nax.barh(df_2023['country'], df_2023['cpi_score'], color=colors)\nax.set_xlabel('CPI Score (0-100)')\nax.set_title('Corruption Perceptions Index in Latin America, 2023', fontweight='bold')\nax.axvline(x=50, color='gray', linestyle='--', alpha=0.5)\nplt.tight_layout()\nplt.savefig('py_fig1_cpi_2023.png', dpi=300, bbox_inches='tight')\nplt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Figure 2: CPI Heatmap\npivot = df.pivot_table(index='country', columns='year', values='cpi_score')\nfig, ax = plt.subplots(figsize=(10,10))\nsns.heatmap(pivot, annot=True, fmt='.1f', cmap='RdYlGn', vmin=0, vmax=100, ax=ax)\nax.set_title('CPI Score Heatmap: Latin America (2020-2023)', fontweight='bold')\nplt.tight_layout()\nplt.savefig('py_fig2_cpi_heatmap.png', dpi=300, bbox_inches='tight')\nplt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Figure 3: Tax Evasion vs CPI\ndf_s = df[(df['year']==2023)].dropna(subset=['cpi_score','tax_evasion_gdp_pct'])\nfig, ax = plt.subplots(figsize=(10,7))\nsc = ax.scatter(df_s['cpi_score'], df_s['tax_evasion_gdp_pct'],\n               c=df_s['tax_evasion_gdp_pct'], cmap='YlOrRd', s=100)\nfor _, r in df_s.iterrows():\n    ax.annotate(r['country'], (r['cpi_score'], r['tax_evasion_gdp_pct']), xytext=(5,5), textcoords='offset points', fontsize=8)\nplt.colorbar(sc, label='Tax Evasion % GDP')\nax.set_xlabel('CPI Score'); ax.set_ylabel('Tax Evasion (% GDP)')\nax.set_title('Tax Evasion vs Corruption, 2023', fontweight='bold')\nplt.tight_layout()\nplt.savefig('py_fig3_tax_vs_cpi.png', dpi=300, bbox_inches='tight')\nplt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Figure 4: ML Risk Distribution\nml_order = ['Low','Medium','High','Very High']\nml_col = {'Low':'#1a9850','Medium':'#fee08b','High':'#f46d43','Very High':'#d73027'}\ncounts = df.dropna(subset=['ml_risk_level'])['ml_risk_level'].value_counts().reindex(ml_order, fill_value=0)\nfig, ax = plt.subplots(figsize=(8,6))\nbars = ax.bar(counts.index, counts.values, color=[ml_col[k] for k in counts.index])\nfor b, v in zip(bars, counts.values):\n    ax.text(b.get_x()+b.get_width()/2, b.get_height()+0.3, str(v), ha='center', fontweight='bold')\nax.set_title('Money Laundering Risk Level (2020-2023)', fontweight='bold')\nplt.tight_layout()\nplt.savefig('py_fig4_ml_risk.png', dpi=300, bbox_inches='tight')\nplt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Figure 5: Homicide Rate by Country\ndf_hom = df[(df['year']==2023)].dropna(subset=['homicide_rate']).sort_values('homicide_rate')\nfig, ax = plt.subplots(figsize=(10,8))\ncolors_h = plt.cm.Reds(np.linspace(0.3, 0.9, len(df_hom)))\nax.barh(df_hom['country'], df_hom['homicide_rate'], color=colors_h)\nax.set_xlabel('Homicide Rate per 100,000')\nax.set_title('Homicide Rate in Latin America, 2023\\n(Proxy for Organized Crime)', fontweight='bold')\nplt.tight_layout()\nplt.savefig('py_fig5_homicide.png', dpi=300, bbox_inches='tight')\nplt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Figure 6: Fraud Rate by Country\ndf_fr = df[(df['year']==2023)].dropna(subset=['fraud_companies_pct']).sort_values('fraud_companies_pct')\nfig, ax = plt.subplots(figsize=(10,8))\ncolors_f = plt.cm.Oranges(np.linspace(0.3, 0.9, len(df_fr)))\nax.barh(df_fr['country'], df_fr['fraud_companies_pct'], color=colors_f)\nax.set_xlabel('Companies Affected by Fraud (%)')\nax.set_title('Fraud & Scams Rate in Latin America, 2023', fontweight='bold')\nplt.tight_layout()\nplt.savefig('py_fig6_fraud.png', dpi=300, bbox_inches='tight')\nplt.show()\nprint('All 6 Python figures generated.')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Summary\n\nThis notebook generates 6 figures:\n1. CPI by country (2023)\n2. CPI heatmap (country × year)\n3. Tax evasion vs CPI scatter (2023)\n4. Money laundering risk distribution\n5. Homicide rate by country\n6. Fraud rate by country\n\n**Citation:** de la Serna (2026). DOI: 10.7910/DVN/8FXZOJ\n"
      ]
    }
  ]
}